%run set_theme.ipynb
%%html
<style>
.slider-container .slider-rail-rect {
fill: #d8bea1 !important;
stroke-width: 0 !important;
height: 7px !important;
}
.slider-grip-rect {
fill: #343a42 !important;
stroke-width: 0 !important;
filter: drop-shadow(0 0 3px rgba(0, 0, 0, 0.3));
}
</style>
import pandas as pd
import plotly.graph_objs as go
import plotly.colors as pc
from plotly.offline import init_notebook_mode
init_notebook_mode()
df = pd.read_parquet('../data/SO_2014_2022.pq')
df = df[(df['Salary'] > 0) & (df['Salary'] < 250000)]
df.head()
| Year | Salary | JobSat | YearsCode | YearsCodePro | Age | Education | OrgSize | LastNewJob | Employment | RespondentType | JobSeek | Gender | Student | Country | CodingActivities | DevType | LearnCodeFrom | LangPresent | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022 | 69318.0 | <NA> | 10 | 5 | 25-34 | master | 500 to 999 employees | <NA> | fulltime | dev | <NA> | male | no | Germany | School or academic work | Data scientist or machine learning specialist;... | Books / Physical media;School (i.e., Universit... | C;C++;Java;JavaScript;MATLAB;Python;Scala;SQL;... |
| 6 | 2022 | 27652.0 | <NA> | 18 | 10 | 25-34 | bachelor | 1,000 to 4,999 employees | <NA> | fulltime | dev | <NA> | male | no | Colombia | Hobby | Developer, full-stack;Developer, back-end | Books / Physical media;Other online resources ... | Bash/Shell/PowerShell;Elixir;HTML/CSS;JavaScri... |
| 9 | 2022 | 15431.0 | <NA> | 5 | 5 | 25-34 | bachelor | 20 to 99 employees | <NA> | fulltime | dev | <NA> | male | no | Ghana | Freelance/contract work | Developer, back-end | On the job training;Coding Bootcamp | JavaScript;Ruby |
| 13 | 2022 | 47352.0 | <NA> | 7 | 7 | 45-54 | master | 10 to 19 employees | <NA> | fulltime | non-dev | <NA> | male | no | Belgium | Hobby | Developer, back-end;Educator or academic;Datab... | Books / Physical media;On the job training;Col... | Delphi;SQL |
| 22 | 2022 | 78084.0 | <NA> | 25 | 25 | 45-54 | bachelor | 500 to 999 employees | <NA> | fulltime | non-dev | <NA> | male | no | Canada | Hobby;Contribute to open-source projects | Engineer, site reliability;Security professional | Books / Physical media;Other online resources ... | Bash/Shell/PowerShell;C;JavaScript;Perl;PHP;Py... |
def get_salary_gap_by_age(age_range: str) -> pd.DataFrame:
# Get male salary distribution per country.
salary_men_df = df.query(f'Gender == "male" & Age == "{age_range}"') \
.groupby(['Country'], as_index=False) \
.agg({ 'Salary': 'mean' }) \
.rename(columns={'Salary': 'SalaryMen'})
# Get female salary distribution per country.
salary_women_df = df.query(f'Gender == "female" & Age == "{age_range}"') \
.groupby(['Country'], as_index=False) \
.agg({ 'Salary': 'mean' }) \
.rename(columns={'Salary': 'SalaryWomen'})
# Calculate salary gap.
new_df = pd.merge(salary_men_df, salary_women_df, on='Country')
new_df['SalaryGap'] = abs(new_df['SalaryMen'] - new_df['SalaryWomen'])
new_df['SalaryGapPercent'] = (new_df['SalaryMen'] - new_df['SalaryWomen']) / new_df['SalaryMen'] * 100
return new_df
def make_title(age_range: str) -> str:
"""Create the plot title, given the age range."""
return f'Global Salary Gap Distribution Between Men and Women ({age_range} years old)' + \
'<br><sup>In most countries, a neutral or female-favoured pay gap turns male favoured at ages 35+</sup>'
# Gather all age ranges.
age_bins = df['Age'].unique().dropna().sort_values()
# Create traces (maps with data) for each age.
traces = []
for i, age_range in enumerate(age_bins):
age_salary_gap_df = get_salary_gap_by_age(age_range)
trace = go.Choropleth(
locations=age_salary_gap_df['Country'],
locationmode='country names',
z=age_salary_gap_df['SalaryGapPercent'],
colorscale=[[0, '#f222e5'], [0.47, '#f283eb'], [0.4701, '#d7f2b2'],
[0.5299, '#b2f2c6'], [0.53, '#9893ff'], [1, '#362cff']],
hovertemplate='<b>%{location}</b><br>Salary gap: %{z:.1f}%<extra></extra>',
visible=(True if i == 0 else False), # make first trace visible by default
colorbar=go.choropleth.ColorBar(
x=0.5,
y=0.92,
xref='paper',
yref='paper',
xanchor='center',
yanchor='bottom',
orientation='h',
len=0.5,
thickness=10,
tickmode='array',
title='',
tickvals=[-100, 0, 100],
ticktext=['female-favoured', 'neutral', 'male-favoured']
),
zmin=-100,
zmax=100,
)
traces.append(trace)
# Create the slider steps.
slider_steps = []
for i, age_range in enumerate(age_bins):
slider_steps.append({
'method': 'update',
'args': [
{'visible': [i == j for j in range(len(traces))]},
{'title': make_title(age_range)},
],
'label': age_range
})
# Create the main figure.
fig = go.Figure(
data=traces,
layout=go.Layout(
width=790,
height=640,
title=make_title(age_bins[0]),
geo={'showocean': True, 'oceancolor': '#a8d5f2', 'landcolor': '#ffffff'},
sliders=[{
'active': 0,
'currentvalue': {'prefix': 'Selected age: '},
'steps': slider_steps
}],
margin={'t': 80, 'r': 20, 'b': 80, 'l': 20}
)
)
# Some additional map configuration.
fig.update_geos(showcountries=True, showcoastlines=False)
# Add caption above the slider
fig.add_annotation(x=0.3, y=-0.09,
xref='paper', yref='paper',
xanchor='left',
yanchor='bottom',
arrowhead=4,
ax=30,
ay=-20,
arrowwidth=2,
text='Drag the slider to different ages to see the salary gap impact')
fig.show()